options(survey.adjust.domain.lonely=TRUE)
options(survey.lonely.psu="adjust")
date <- Sys.Date()
library(ggrepel)
library(survey)
library(nortest)
library(dplyr)
library(ggplot2)
options(scipen = 999)




## ANALYSIS OF RESULTS IN TEST DATASETS, ALL ALGORITHMS, SAME PREDICTORS, Manuel los envió el 23/11
 ## Predictors: sex, age, region, weight, height, sbp, dbp

setwd("~/Desktop/Artículos/STEPS/Results /Resultados 24_11/Resultados/")

  #1. Loading datasets
datasets = list.files(pattern = "*.csv")
for(i in 1:length(datasets)) assign(datasets[i], read.csv(datasets[i]))
rm(datasets,i)
length(unique(ls()))  ## 20 models

 #2. Adding a column with the name of the model
my_list <- mget(ls(pattern = "*.csv"))
my_list <- Map(cbind, my_list, model = names(my_list))
list2env(my_list, .GlobalEnv) 
rm(my_list)

 #3. Pooling all models
names(test_CNN_boxcox.csv)[25]<-"prediction"
names(test_CNN_standarize.csv)[25]<-"prediction"

datalist <- lapply(ls(), function(x) if (class(get(x)) == "data.frame") get(x))
data <- plyr::rbind.fill(datalist)
rm(list = setdiff(ls(), "data"))
summary(data)
length(unique(data$study_id)) # 21 datasets
length(unique(data$model)) #20 models

  #4. Computing difference between the estimated value per the INTERSALT equaton vs the ML prediction value 
  # at the individual level. That is: Difference = Estimation - prediction
data$difference<-data$estimated_sodium_excretion_intersalt-data$prediction

  #5. Converting pooleddata to a list of df (based on the number of models) to do the svy (svy mean difference)
df_list <- list()
for (i in unique(data$model)){
  df_list[[i]] <- data[which(data$model == i),]
}
length(df_list) == length(unique(data$model)) # list of length = number of models

source("~/Desktop/Artículos/STEPS/Scripts/Analysis algorithms Functions.R")
  #6. Compute mean difference with svy (in each model, by country)
df_list <- lapply(df_list, grouping_svy)
mean_difference <- lapply(df_list, results_svy_mean_difference)
rm(data, df_list)
mean_difference <- Map(cbind, mean_difference, model = names(mean_difference))
list2env(mean_difference, .GlobalEnv) 
rm(mean_difference)

  #7. Pooling all data to compare models by sex
difference = sapply(.GlobalEnv, is.data.frame) 
difference<-do.call(rbind, mget(names(difference)[difference]))

rm(list=setdiff(ls(), c("difference")))


difference$sex <- factor(difference$sex,
                         levels = c(1,2),
                         labels = c("Men", "Women"))

difference_men<-dplyr::filter(difference, sex == "Men")
difference_women<-dplyr::filter(difference, sex == "Women")

men<-tapply(difference_men$difference, difference_men$model, mean)
women<-tapply(difference_women$difference, difference_women$model, mean)
both<-tapply(difference$difference, difference$model, mean)

##For sup tables
men <- data.frame(template=names(men),mean=men)
men$sex<-"men"
women <- data.frame(template=names(women),mean=women)
women$sex<-"women"
both <- data.frame(template=names(both),mean=both)
both$sex<-"both sexes"

###Best models we plot them: 	test_HGB_standarized.csv, test_MLP_boxcox.csv, 	test_CNN_standarize.csv

##Supp tab: Mean difference between observed and predicted salt intake by sex across all ML algorithms
sup<-rbind(both, men, women)
row.names(sup)<-NULL
sup$mean<-round(sup$mean, 4)
sup$template<- gsub( ".csv", "", sup$template)
sup$template<- gsub( "test_", "", sup$template)
names(sup)[1:2]<-c("ML algorithm", "Mean difference between observed \nand predicted mean salt intake")

write.csv(sup, paste0("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_difference_observed_vs_predicted_all_algorithms_", Sys.Date() ,".csv"), row.names = FALSE)

      ##For text in Supp materials:
data<-read.csv("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_difference_observed_vs_predicted_all_algorithms_2021-11-25.csv")
View(data[which(data$sex=="both sexes"),])
View(data[which(data$sex=="men"),])
View(data[which(data$sex=="women"),])


##For both sex: 
    ## test_HGB_standarized.csv
## test_MLP_boxcox.csv
## test_CNN_standarize.csv

### filtering only the  top 3
difference_figure<-dplyr::filter(difference, model == "test_HGB_standarized.csv" | model == "test_MLP_boxcox.csv" | model == "test_CNN_standarize.csv")
difference_figure$study_id<-paste0(difference_figure$country, " - ", difference_figure$data_year)
difference_figure$model<- gsub( ".csv", "", difference_figure$model)
difference_figure$model<- gsub( "test_", "", difference_figure$model)

difference_figure$model[difference_figure$model=="HGB_standarized"]<-"HuR_standarized"

#Sup Fig: Comparison between mean difference between observed and predicted salt intake across the best algorithms
pdf(paste0("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_difference_three_predictions_by_sex_", Sys.Date(), ".pdf"),
    width = 28, 
    height = 17)
ggplot(data = difference_figure,
       aes(x = study_id,
           y = difference,
           color =  model)) +
  geom_point(size = 5) +
  geom_errorbar(aes(ymin = ci_l, 
                    ymax = ci_u), 
                width = 0.2) +
  facet_grid(~sex) +
  theme_light() +
  labs(
    y = "Mean difference between observed and predicted mean salt intake",
    x = ""
  ) + ylim (-2.5, 2) +
  theme(
    axis.title.y = element_text(size = 20),
    axis.text.x = element_text(size = 18, angle = 45),
    axis.text.y = element_text(size = 20),
    strip.text.x = element_text(size = 20),
    strip.text.y = element_text(size = 20),legend.text = element_text(size = 20),
    legend.title=element_blank()
  )  + geom_hline(yintercept=0, color = "red")  +  scale_color_brewer(palette ="Set1")
dev.off()

#Men:
  #CNN-> 8
  #MLP-> 4
  #HGB-> 9

#Women:
  #CNN-> 8
  #MLP-> 3
  #HGB-> 10

################################################################################################
###FIG 1 AND SUPP 1: MEAN SALT INTAKE
full_validate<-read.csv("~/Desktop/Artículos/STEPS/Results /Resultados 24:11/Resultados/test_HGB_standarized.csv")
d.s                          <- svydesign(id = ~psu+study_id, strata = ~stratum, weights = ~wstep3, data = full_validate, nest = TRUE)   
mean_salt_intake_validate_real  <- svyby(~estimated_sodium_excretion_intersalt, 
                                         by = ~study_id+country+data_year+sex, 
                                         design = d.s, 
                                         FUN = svymean, 
                                         vartype = c('ci'),
                                         na.rm.all = T)
mean_salt_intake_validate_real$category<-"Observed"
colnames(mean_salt_intake_validate_real)[5]<-"mean_salt"

mean_salt_intake_validate_prediction  <- svyby(~prediction, 
                                               by = ~study_id+country+data_year+sex, 
                                               design = d.s, 
                                               FUN = svymean, 
                                               vartype = c('ci'),
                                               na.rm.all = T)
mean_salt_intake_validate_prediction$category<-"ML predicted"
colnames(mean_salt_intake_validate_prediction)[5]<-"mean_salt"

mean_salt_intake_validate<-rbind(mean_salt_intake_validate_real, mean_salt_intake_validate_prediction)
rm(d.s)

mean_salt_intake_validate$sex <- factor(mean_salt_intake_validate$sex,
                                        levels = c(1,2),
                                        labels = c("Men", "Women"))

mean_salt_intake_validate_figure<-mean_salt_intake_validate
mean_salt_intake_validate_figure$study_id<-paste0(mean_salt_intake_validate_figure$country, " - ", mean_salt_intake_validate_figure$data_year)

pdf(paste0("~/Desktop/Artículos/STEPS/Figures/Fig1_Mean_salt_intake_observed_vs_ML-predicted_by_survey_", Sys.Date(), ".pdf"),
    width = 28, 
    height = 17)
ggplot(data = mean_salt_intake_validate_figure,
       aes(x = study_id,
           y = mean_salt,
           color = category)) +
  geom_point(size = 5) +
  geom_errorbar(aes(ymin = ci_l, 
                    ymax = ci_u), 
                width = 0.7) +
  facet_grid(~sex) +
  theme_light() +
  ylim(6, 12) +
  labs(
    y = "Mean salt intake (g/day)",
    x = ""
  ) +
  theme(
    axis.title.y = element_text(size = 20),
    axis.text.x = element_text(size = 18, angle = 45),
    axis.text.y = element_text(size = 20),
    strip.text.x = element_text(size = 20),
    strip.text.y = element_text(size = 20),legend.text = element_text(size = 20),
    legend.title=element_blank()
  )   +
  scale_color_manual(values = c("#00AFBB", "#E7B800")) 
dev.off()

rm(mean_salt_intake_validate_figure)
#


mean_salt_intake_validate<-arrange(mean_salt_intake_validate, country, data_year, sex,  category)
mean_salt_intake_validate$mean_salt<-round(mean_salt_intake_validate$mean_salt, 2)
mean_salt_intake_validate$ci_l<-round(mean_salt_intake_validate$ci_l, 2)
mean_salt_intake_validate$ci_u<-round(mean_salt_intake_validate$ci_u, 2)
names(mean_salt_intake_validate)<-c("STEPS_ID", "Country", "Year", "Sex", "Mean salt intake (g/day)", "Mean salt intake (g/day) lower", "Mean salt intake (g/day) upper", "Category")
mean_salt_intake_validate$STEPS_ID<-NULL
write.csv(mean_salt_intake_validate, paste0("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_salt_intake_observed_and_ML-predicted_", Sys.Date() ,".csv"), row.names = FALSE)

#For text: Observed and predicted mean salt intake during the ML model derivation and validation
View(mean_salt_intake_validate[which(mean_salt_intake_validate$Sex=="Men" & mean_salt_intake_validate$Category=="Observed"),])
View(mean_salt_intake_validate[which(mean_salt_intake_validate$Sex=="Women" & mean_salt_intake_validate$Category=="Observed"),])

####################################

rm(list = ls())


###Sup table: Mean difference between observed and predicted salt intake by sex in each survey included in the ML model development
mean_difference<-read.csv("~/Desktop/Artículos/STEPS/Results /Resultados 24:11/Resultados/test_HGB_standarized.csv")
mean_difference$difference<-mean_difference$estimated_sodium_excretion_intersalt-mean_difference$prediction ##calculate difference
d.s                          <- svydesign(id = ~psu+study_id, strata = ~stratum, weights = ~wstep3, data = mean_difference, nest = TRUE)
difference_mean_RF <- svyby(~difference, 
                               by = ~study_id+country+data_year+sex, 
                               design = d.s, 
                               FUN = svymean, 
                               vartype = c('ci'),
                               na.rm.all = T)
difference_mean_RF$category <-"Observed vs predicted"
rm(mean_difference, d.s)


difference_mean_RF<-arrange(difference_mean_RF, country, data_year, sex)
difference_mean_RF$sex <- factor(difference_mean_RF$sex,
                                    levels = c(1,2),
                                    labels = c("Men", "Women"))

names(difference_mean_RF)<-c("STEPS_ID", "Country", "Year", "Sex", "Mean difference (g/day)", "Mean difference (g/day) lower", "Mean difference (g/day) upper", "Category")

### join p-value (code below to get "a")
difference_mean_RF<-inner_join(difference_mean_RF, a)
difference_mean_RF$STEPS_ID<-NULL
difference_mean_RF$`Mean difference (g/day)`<-round(difference_mean_RF$`Mean difference (g/day)`, 2)
difference_mean_RF$`Mean difference (g/day) lower`<-round(difference_mean_RF$`Mean difference (g/day) lower`, 2)
difference_mean_RF$`Mean difference (g/day) upper`<-round(difference_mean_RF$`Mean difference (g/day) upper`, 2)


write.csv(difference_mean_RF, paste0("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_difference_observed_vs_ML-predicted_by_survey", Sys.Date() ,".csv"), row.names = FALSE)

#############################################
## POPULATION DEMOGRAPHICS IN THE TEST DATA SET
test<-read.csv("~/Desktop/Artículos/STEPS/Results /Resultados 24:11/Resultados/test_HGB_standarized.csv")
d.s                     <- svydesign(id = ~psu, strata = ~stratum, weights = ~wstep3, data = test, nest = TRUE)

#mean salt intake in all test dataset
svymean(~prediction,  design = d.s) #prediction
confint(svymean(~prediction,  design = d.s), level = 0.95)

svyby(~prediction, by = ~sex, 
      design = d.s, 
      FUN = svymean, 
      vartype = c('ci'),
      na.rm.all = T)

svymean(~estimated_sodium_excretion_intersalt,  design = d.s) #observed
confint(svymean(~estimated_sodium_excretion_intersalt,  design = d.s), level = 0.95)

svyby(~estimated_sodium_excretion_intersalt, by = ~sex, 
      design = d.s, 
      FUN = svymean, 
      vartype = c('ci'),
      na.rm.all = T)



####################################

#paired  T-test in general
test<-read.csv("~/Desktop/Artículos/STEPS/Results /Resultados 24:11/Resultados/test_HGB_standarized.csv")

t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)

test_men<-filter(test, sex == 1)
test_women<-filter(test, sex == 2)

t.test(test_men$estimated_sodium_excretion_intersalt, test_men$prediction, paired = TRUE)
t.test(test_women$estimated_sodium_excretion_intersalt, test_women$prediction, paired = TRUE)

##paired  T-test across countries
test_original<-read.csv("~/Desktop/Artículos/STEPS/Results /Resultados 24:11/Resultados/test_HGB_standarized.csv")
test<-filter(test_original, study_id == "ARM_2016_STEPS_v01" & sex == 1)
p1<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "ARM_2016_STEPS_v01" & sex == 2)
p2<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "AZE_2017_STEPS_v01" & sex == 1)
p3<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "AZE_2017_STEPS_v01" & sex == 2)
p4<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BGD_2018_STEPS_v01" & sex == 1)
p5<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BGD_2018_STEPS_v01" & sex == 2)
p6<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BLR_2016_STEPS_v01" & sex == 1)
p7<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BLR_2016_STEPS_v01" & sex == 2)
p8<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BRN_2015_STEPS_v01" & sex == 1)
p9<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BRN_2015_STEPS_v01" & sex == 2)
p10<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BTN_2014_STEPS_v01" & sex == 1)
p11<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BTN_2014_STEPS_v01" & sex == 2)
p12<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BTN_2019_STEPS_v01" & sex == 1)
p13<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "BTN_2019_STEPS_v01" & sex == 2)
p14<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "JOR_2019_STEPS_v01" & sex == 1)
p15<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "JOR_2019_STEPS_v01" & sex == 2)
p16<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "LBN_2017_STEPS_v01" & sex == 1)
p17<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "LBN_2017_STEPS_v01" & sex == 2)
p18<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "MAR_2017_STEPS_v01" & sex == 1)
p19<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "MAR_2017_STEPS_v01" & sex == 2)
p20<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "MNG_2013_STEPS_v01" & sex == 1)
p21<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "MNG_2013_STEPS_v01" & sex == 2)
p22<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "MNG_2019_STEPS_v01" & sex == 1)
p23<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "MNG_2019_STEPS_v01" & sex == 2)
p24<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "MWI_2017_STEPS_v01" & sex == 1)
p25<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "MWI_2017_STEPS_v01" & sex == 2)
p26<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "NPL_2019_STEPS_v01" & sex == 1)
p27<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "NPL_2019_STEPS_v01" & sex == 2)
p28<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "SDN_2016_STEPS_v01" & sex == 1)
p29<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "SDN_2016_STEPS_v01" & sex == 2)
p30<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "SLB_2015_STEPS_v01" & sex == 1)
p31<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "SLB_2015_STEPS_v01" & sex == 2)
p32<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "TKL_2014_STEPS_v01" & sex == 1)
p33<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "TKL_2014_STEPS_v01" & sex == 2)
p34<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "TKM_2018_STEPS_v01" & sex == 1)
p35<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "TKM_2018_STEPS_v01" & sex == 2)
p36<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "ZMB_2017_STEPS_v01" & sex == 1)
p37<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "ZMB_2017_STEPS_v01" & sex == 2)
p38<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "CHL_2017_ENS" & sex == 1)
p39<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "CHL_2017_ENS" & sex == 2)
p40<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "TON_2017_STEPS_v01" & sex == 1)
p41<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

test<-filter(test_original, study_id == "TON_2017_STEPS_v01" & sex == 2)
p42<-data.frame(p.value=t.test(test$estimated_sodium_excretion_intersalt, test$prediction, paired = TRUE)$p.value, study_id = first(test$study_id), sex = first (test$sex))

rm(test, test_original)
datalist <- lapply(ls(), function(x) if (class(get(x)) == "data.frame") get(x))
pooleddata <- plyr::rbind.fill(datalist)
rm(list=setdiff(ls(), c("pooleddata")))

a<-pooleddata
a$p.value<-round(a$p.value, 4)
#a$p.value<- pvalue(a$p.value, accuracy = 0.05, add_p = TRUE)
a$sex <- factor(a$sex,
                                    levels = c(1,2),
                                    labels = c("Men", "Women"))
names(a)[2]<-"STEPS_ID"
names(a)[3]<-"Sex"

#Luego lo mismo para cada país, subset por cada STEPS ID
#p de 0.06 va al texto, todos los p de cada STEPS como un supp o tabla principal
#"La diferencia más negativa, positiva, más cercana a 0" con p de tanto.
rm(list=ls())
#Reading the table for the text:
data<-read.csv("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_difference_observed_vs_ML-predicted_by_survey2021-11-25.csv")
View(data[which(data$Sex=="Men" & data$Mean.difference..g.day.>=0),])
View(data[which(data$Sex=="Men" & data$Mean.difference..g.day.<0),])
View(data[which(data$Sex=="Men" ),])
View(data[which(data$Sex=="Women" & data$Mean.difference..g.day.>=0),])
View(data[which(data$Sex=="Women" & data$Mean.difference..g.day.<0),])
View(data[which(data$Sex=="Women" ),])

#########################################################################################################

##Rebuttal letter: sub-analysis by  age, BP and BMI values, hypertension.
#Age < 30 years vs Age ≥30 years
# BMI <18.5 kg/m2 vs BMI 18.5-24.9 kg/m2 vs BMI 25.0-29.9 kg/m2 vs BMI ≥30 kg/m2
# Hypertension (≥140/90 mmHg) vs No hypertension 
data<-read.csv("~/Desktop/Artículos/STEPS/Results /Resultados 24:11/Resultados/test_HGB_standarized.csv")
summary(data)
data$age_group<- ifelse(data$age>=30, ">=30", "<30")
table(data$age, data$age_group)
data<-data.table(data)
age<-data[, list(estimated_sodium_excretion_intersalt=round(mean(estimated_sodium_excretion_intersalt),1),
                 prediction=round(mean(prediction),1)),
            by=list(age_group)] 

data$bmi<-as.numeric(data$bmi)
data$bmi_cat<- ifelse(data$bmi<18.5, "<18.5", 
                      ifelse(data$bmi>=18.5 & data$bmi<25, "18.5-24.9",
                             ifelse(data$bmi>=25 & data$bmi<30, "25.0-29.9",
                                    ifelse(data$bmi>=30, ">30",NA ))))
bmi<-data[, list(estimated_sodium_excretion_intersalt=round(mean(estimated_sodium_excretion_intersalt),1),
                 prediction=round(mean(prediction),1)),
          by=list(bmi_cat)] 

data$raised_BP<- ifelse(data$sbp >=140 | data$dbp >=90, "raised BP", 
                      ifelse(data$sbp <140 & data$dbp <90, "not raised BP", NA))
table(data$raised_BP, useNA = c("always"))

raised_BP<-data[, list(estimated_sodium_excretion_intersalt=round(mean(estimated_sodium_excretion_intersalt),1),
                 prediction=round(mean(prediction),1)),
          by=list(raised_BP)] 
rm(age, bmi, raised_BP)

  #T-test independiente en cada sub-grupo: comparamos mean salt en cada subgrupo: p ej. <30 vs >30, excepto BMI
younger_30<-filter(data, age <30)
older_30<-filter(data, age >=30)

t.test(younger_30$estimated_sodium_excretion_intersalt, older_30$estimated_sodium_excretion_intersalt)
t.test(younger_30$prediction, older_30$prediction)

    #BP
raised_bp<-filter(data, raised_BP == "raised BP")
no_raised_bp<-filter(data, raised_BP == "not raised BP")

t.test(raised_bp$estimated_sodium_excretion_intersalt, no_raised_bp$estimated_sodium_excretion_intersalt)
t.test(raised_bp$prediction, no_raised_bp$prediction)

# Compute the analysis of variance, for BMI groups
res.aov <- aov(estimated_sodium_excretion_intersalt ~ bmi_cat, data = data)
summary(res.aov)

#########################################################################################################



##Sup table: Observed mean salt intake (g/day) by equation and sex in each survey included in the ML model development.
full_validate<-read.csv("~/Desktop/Artículos/STEPS/Results /Resultados 24:11/Resultados/test_HGB_standarized.csv")
d.s                          <- svydesign(id = ~psu+study_id, strata = ~stratum, weights = ~wstep3, data = full_validate, nest = TRUE)   
mean_salt_intake_validate_intersalt  <- svyby(~estimated_sodium_excretion_intersalt, 
                                         by = ~study_id+country+data_year+sex, 
                                         design = d.s, 
                                         FUN = svymean, 
                                         vartype = c('ci'),
                                         na.rm.all = T)
mean_salt_intake_validate_intersalt$category<-"Observed_intersalt"
colnames(mean_salt_intake_validate_intersalt)[5]<-"mean_salt"

mean_salt_intake_validate_tanaka  <- svyby(~estimated_sodium_excretion_tanaka, 
                                               by = ~study_id+country+data_year+sex, 
                                               design = d.s, 
                                               FUN = svymean, 
                                               vartype = c('ci'),
                                               na.rm.all = T)
mean_salt_intake_validate_tanaka$category<-"Observed_tanaka"
colnames(mean_salt_intake_validate_tanaka)[5]<-"mean_salt"


mean_salt_intake_validate_kawasaki  <- svyby(~estimated_sodium_excretion_kawasaki, 
                                           by = ~study_id+country+data_year+sex, 
                                           design = d.s, 
                                           FUN = svymean, 
                                           vartype = c('ci'),
                                           na.rm.all = T)
mean_salt_intake_validate_kawasaki$category<-"Observed_kawasaki"
colnames(mean_salt_intake_validate_kawasaki)[5]<-"mean_salt"

mean_salt_intake_validate_toft  <- svyby(~estimated_sodium_excretion_toft, 
                                             by = ~study_id+country+data_year+sex, 
                                             design = d.s, 
                                             FUN = svymean, 
                                             vartype = c('ci'),
                                             na.rm.all = T)
mean_salt_intake_validate_toft$category<-"Observed_toft"
colnames(mean_salt_intake_validate_toft)[5]<-"mean_salt"

mean_salt_intake_validate<-rbind(mean_salt_intake_validate_intersalt, mean_salt_intake_validate_kawasaki, mean_salt_intake_validate_tanaka, mean_salt_intake_validate_toft)
rm(d.s)

mean_salt_intake_validate$sex <- factor(mean_salt_intake_validate$sex,
                                        levels = c(1,2),
                                        labels = c("Men", "Women"))


mean_salt_intake_validate<-arrange(mean_salt_intake_validate, country, data_year, sex,  category)
mean_salt_intake_validate$study_id<-NULL
mean_salt_intake_validate$mean_salt<-round(mean_salt_intake_validate$mean_salt, 2)
mean_salt_intake_validate$ci_l<-round(mean_salt_intake_validate$ci_l, 2)
mean_salt_intake_validate$ci_u<-round(mean_salt_intake_validate$ci_u, 2)

names(mean_salt_intake_validate)<-c("Country", "Year", "Sex", "Mean salt intake (g/day)", "Mean salt intake (g/day) lower", "Mean salt intake (g/day) upper", "Category")
write.csv(mean_salt_intake_validate, paste0("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_salt_intake_all_formulas_", Sys.Date() ,".csv"), row.names = FALSE)

rm(list=ls())
#Reading the table for the text:
data<-read.csv("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_salt_intake_all_formulas_2021-11-25.csv")
View(data[which(data$Mean.salt.intake..g.day.<=5),])

#########################################################################################################
#########################################################################################################

###########################
###  ANALYSIS 54 LMICs (where we applied the model) ####
###########################

setwd("~/Desktop/Artículos/STEPS/Results /Predictions 25:11/")

##Weighted distribution in 54 surveys

  # 1.Loading pooled dataset
data<-read.csv("~/Desktop/Artículos/STEPS/Data/Extraction_model_application_pooleddata_2021-11-29.csv")
  # 2. Converting it into a list of surveys
df_list <- list()
for (i in unique(data$study_id)){
  df_list[[i]] <- data[which(data$study_id == i),]
}
length(df_list) == length(unique(data$study_id)) # list of length = number of surveys
  # 3. Loading functions
source("~/Desktop/Artículos/STEPS/Scripts/Analysis algorithms Functions.R")
df_list <- lapply(df_list, grouping_svy_applied)
  # 4. Apply functions
sample   <- lapply(df_list, n_sample) # for supplementary tables
sbp <- lapply(df_list, supp_svy_means, var = "SBP") 
dbp <- lapply(df_list, supp_svy_means, var = "DBP")
weight <- lapply(df_list, supp_svy_means, var = "weight")
height <- lapply(df_list, supp_svy_means, var = "height")
age <- lapply(df_list, supp_svy_means, var = "AGE") 
proportion_men<- lapply(df_list, supp_svy_props, var = "SEX") 
  data<-data.table(data)
other<-data[, list(region=first(region), age_min = round(min(age)), age_max = round(max(age)), 
                   sbp_min = round(min(sbp)), sbp_max = round(max(sbp)), dbp_min = round(min(dbp)), dbp_max = round(max(dbp)),
                   weight_min = min(weight), weight_max = max(weight), height_min= min(height), 
                   height_max =max(height)),
            by=list(country,study_id, data_year)] 

sample <- as.data.frame(do.call(rbind, sample))
sbp <- as.data.frame(do.call(rbind, sbp))
dbp <- as.data.frame(do.call(rbind, dbp))
age <- as.data.frame(do.call(rbind, age))
proportion_men <- as.data.frame(do.call(rbind, proportion_men))
weight <- as.data.frame(do.call(rbind, weight))
height <- as.data.frame(do.call(rbind, height))

sup <- cbind(c(unique(data$study_id)), sample, proportion_men,sbp, dbp, age, weight, height)
names(sup) <- c("study_id", "sample", "proportion_men", "sbp", 
                "dbp", "age", "weight", 
                "height")

sup<-merge(other, sup,  by = "study_id")
sup<-select(sup, country, data_year, region, sample, age, age_min, age_max, proportion_men, sbp, sbp_min,
            sbp_max, dbp, dbp_min, dbp_max, weight, weight_min, weight_max, height, height_min,
            height_max)
rm(other, age, proportion_men, sbp, dbp, weight, height, sample, df_list)

sup$age<-round(sup$age, 0)
sup$proportion_men<-round((sup$proportion_men*100), 1)
sup$sbp<-round(sup$sbp)
sup$dbp<-round(sup$dbp)
sup$weight<-round(sup$weight, 1)
sup$weight_min<-round(sup$weight_min, 1)
sup$weight_max<-round(sup$weight_max, 1)
sup$height<-round(sup$height, 2)
sup$height_min<-round(sup$height_min, 2)
sup$height_max<-round(sup$height_max, 2)

sup$age_range<-paste0(sup$age_min, "-", sup$age_max)
sup$sbp<-paste0(sup$sbp, " (", sup$sbp_min, "-", sup$sbp_max, ")")
sup$dbp<-paste0(sup$dbp, " (", sup$dbp_min, "-", sup$dbp_max, ")")
sup$weight<-paste0(sup$weight, " (", sup$weight_min, "-", sup$weight_max, ")")
sup$height<-paste0(sup$height, " (", sup$height_min, "-", sup$height_max, ")")

sup<-select(sup,country, data_year, region, sample, age, age_range, proportion_men, sbp, dbp, weight, height,
)

names(sup)<-c("Country", "Year", "Region", "Sample size" ,"Mean age (years)", "Age range (years)",
              "Proportion of men (%)", "Mean, minimum and maximum \nvalues of SBP (mmHg)",
              "Mean, minimum and maximum \nvalues of DBP (mmHg)","Mean, minimum and maximum \nvalues of weight (kg)", "Mean, minimum and maximum \nvalues of height (m)")

write.csv(sup,
          paste0("~/Desktop/Artículos/STEPS/Supplementary materials/Weighted_distributiony_in_54_STEPS_", Sys.Date(), ".csv"),
          row.names = F)




###FIG 2 AND SUPP : MEAN SALT INTAKE

#1. Load dataset 
setwd("~/Desktop/Artículos/STEPS/Results /Predictions 25:11/")
data<-read.csv("~/Desktop/Artículos/STEPS/Results /Predictions 25_11/predictions_HGB.csv")

# 2. Converting it into a list of surveys
df_list <- list()
for (i in unique(data$study_id)){
  df_list[[i]] <- data[which(data$study_id == i),]
}
length(df_list) == length(unique(data$study_id)) # list of length = number of surveys
# 3. Loading functions
source("~/Desktop/Artículos/STEPS/Scripts/Analysis algorithms Functions.R")
df_list <- lapply(df_list, grouping_svy_applied)
# 4. Apply functions to compute mean salt intake
mean_salt_intake_sex <- lapply(df_list, results_svy_mean_salt_sex) 
mean_salt_intake_total <- lapply(df_list, results_svy_mean_salt_total)

mean_salt_intake_sex<-bind_rows(mean_salt_intake_sex)
mean_salt_intake_sex$category<-"predicted"
colnames(mean_salt_intake_sex)[5]<-"mean_salt"

mean_salt_intake_total<-bind_rows(mean_salt_intake_total)
mean_salt_intake_total$category<-"predicted"
colnames(mean_salt_intake_total)[4]<-"mean_salt"
mean_salt_intake_total$sex<- 3

#Join sex-specific and total
mean_salt_intake_sex<-rbind(mean_salt_intake_sex, mean_salt_intake_total)
rm(mean_salt_intake_total)
mean_salt_intake_sex$sex <- factor(mean_salt_intake_sex$sex,
                                   levels = c(1,2, 3),
                                   labels = c("Men", "Women", "Total"))

## Plot figure 2: Predicted mean salt intake by survey and sex
mean_salt_intake_validate_figure<-mean_salt_intake_sex
mean_salt_intake_validate_figure$country[mean_salt_intake_validate_figure$country == "British Virgin Islands"]<-"Virgin Islands, British"
mean_salt_intake_validate_figure$country[mean_salt_intake_validate_figure$country == "Cabo Verde"]<-"Cape Verde"
mean_salt_intake_validate_figure$country[mean_salt_intake_validate_figure$country == "United Republic of Tanzania"]<-"Tanzania, United Republic of"
mean_salt_intake_validate_figure$country[mean_salt_intake_validate_figure$country == "Republic of Moldova"]<-"Moldova, Republic of"
mean_salt_intake_validate_figure$country[mean_salt_intake_validate_figure$country == "Eswatini"]<-"Swaziland"
  ##Joining iso 3
iso3<-read.csv("~/Desktop/Artículos/CRA Mortality Diabetes LAC/00.Data/countries_codes_and_coordinates.csv")
iso3<-select(iso3,  Country, Alpha.3.code)
colnames(iso3)[1]<-"country"

mean_salt_intake_validate_figure<-left_join(mean_salt_intake_validate_figure, iso3)
length(unique(mean_salt_intake_validate_figure$study_id)) # 54 datasets
mean_salt_intake_validate_figure$study_id<-paste0(mean_salt_intake_validate_figure$Alpha.3.code, " - ", mean_salt_intake_validate_figure$data_year)
both<-filter(mean_salt_intake_validate_figure, sex == "Total")
both$sex<-NULL
both$ci_u<-NULL
both$ci_l<-NULL
names(both)[4]<-"mean_total"
mean_salt_intake_validate_figure<-left_join(mean_salt_intake_validate_figure, both)

pdf(paste0("~/Desktop/Artículos/STEPS/Figures/Fig2_Mean_salt_intake_predicted_by_survey_in_54_LMICs_", Sys.Date(), ".pdf"),
    width = 25, 
    height = 17)
ggplot(data = mean_salt_intake_validate_figure,
       aes(x = mean_salt,
           y = reorder(study_id, -mean_total),
           color = sex)) +
  geom_point(size = 5) +
  geom_errorbar(aes(xmin = ci_l, 
                    xmax = ci_u), 
                width = 0.7) +
  #facet_grid(~sex) +
  theme_light() +
  xlim(6.4, 11.5) +
  labs(
    y = "",
    x = "Mean salt intake (g/day)"
  ) +
  theme(
    axis.title.y = element_text(size = 20),
    axis.text.y = element_text(size = 20),
    axis.text.x = element_text(size = 20),
    axis.title.x = element_text(size = 20),
    strip.text.x = element_text(size = 20),
    strip.text.y = element_text(size = 20),legend.text = element_text(size = 20),
    legend.title=element_blank(), legend.position = c(0.87, 0.87)
  )   +
  scale_color_manual(values = c("blue","red","darkgreen"))
dev.off()

rm(mean_salt_intake_validate_figure)
#
mean_salt_intake_sex<-arrange(mean_salt_intake_sex, country, data_year, sex,  category)
mean_salt_intake_sex$mean_salt<-round(mean_salt_intake_sex$mean_salt, 2)
mean_salt_intake_sex$ci_l<-round(mean_salt_intake_sex$ci_l, 2)
mean_salt_intake_sex$ci_u<-round(mean_salt_intake_sex$ci_u, 2)
names(mean_salt_intake_sex)<-c("STEPS_ID", "Country", "Year", "Sex", "Mean salt intake (g/day)", "Mean salt intake (g/day) lower", "Mean salt intake (g/day) upper", "Category")
mean_salt_intake_sex$STEPS_ID<-NULL

write.csv(mean_salt_intake_sex, paste0("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_salt_intake_predicted_by_survey_in_54_LMICs_", Sys.Date() ,".csv"), row.names = FALSE)

##Explore predicted results by country
data<-data.table(data)
who_region<-data[,list(who_region = first(region)), 
by=list(country)] 
names(who_region)[1]<-"Country"
mean_salt_intake_sex<- left_join(mean_salt_intake_sex, who_region)
rm(who_region)
mean_salt_intake_total<-filter(mean_salt_intake_sex, Sex == "Total")
mean_salt_intake_men<-filter(mean_salt_intake_sex, Sex == "Men")
mean_salt_intake_women<-filter(mean_salt_intake_sex, Sex == "Women")

rm(list=ls())
#Reading the table for the text:
data<-read.csv("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_salt_intake_predicted_by_survey_in_54_LMICs_2021-11-29.csv")
#Joining with income and region
income<-read.csv("~/Desktop/Artículos/STEPS/Supplementary materials/Countries_by_income_2021-11-22.csv")
data<-merge(data, income, by=c("Country", "Year"))
data$Mean.salt.intake..g.day.<-round(data$Mean.salt.intake..g.day., 1)
View(data[which(data$Sex=="Men"),])
View(data[which(data$Sex=="Women"),])
View(data[which(data$Sex=="Total"),])
####################################

###SVY SALT INTAKE FOR ALL DATA (for the main results in text)
data<-read.csv("~/Desktop/Artículos/STEPS/Results /Predictions 25:11/predictions_HGB.csv")
d.s                          <- svydesign(id = ~1, strata = NULL, weights = NULL, data = data, nest = TRUE)  
#Year
table(data$data_year, data$study_id)
#Age
svymean(~age, design = d.s)
confint(svymean(~age, design = d.s), level = 0.95)
#Proportion of women
svyciprop(~sex == 2, design = d.s)
#Mean BP
svymean(~sbp, design = d.s)
confint(svymean(~sbp, design = d.s), level = 0.95)

svymean(~dbp, design = d.s)
confint(svymean(~dbp, design = d.s), level = 0.95)
## Mean weight and height
svymean(~weight, design = d.s)
confint(svymean(~weight, design = d.s), level = 0.95)

svymean(~height, design = d.s)
confint(svymean(~height, design = d.s), level = 0.95)

## mean salt
mean_salt_sex<-svyby(~prediction, 
                     by = ~sex, 
                     design = d.s, 
                     FUN = svymean, 
                     vartype = c('ci'),
                     na.rm = T)
mean_salt_sex$prediction<-round(mean_salt_sex$prediction, 1)
mean_salt_sex$ci_l<-round(mean_salt_sex$ci_l, 1)
mean_salt_sex$ci_u<-round(mean_salt_sex$ci_u, 1)

svymean(~prediction, design = d.s)
confint(svymean(~prediction, design = d.s), level = 0.95)


























d.s                          <- svydesign(id = ~psu+study_id, strata = ~stratum, weights = ~wstep2, data = data, nest = TRUE)  

##Demographics
#Year
table(data$data_year, data$study_id)
#Age
svymean(~age, design = d.s)
confint(svymean(~age, design = d.s), level = 0.95)
#Proportion of women
svyciprop(~sex == 2, design = d.s)
#Mean BP
svymean(~sbp, design = d.s)
confint(svymean(~sbp, design = d.s), level = 0.95)

svymean(~dbp, design = d.s)
confint(svymean(~dbp, design = d.s), level = 0.95)
## Mean weight and height
svymean(~weight, design = d.s)
confint(svymean(~weight, design = d.s), level = 0.95)

svymean(~height, design = d.s)
confint(svymean(~height, design = d.s), level = 0.95)

###MEAN SALT
mean_salt_sex<-svyby(~prediction, 
                   by = ~sex, 
                   design = d.s, 
                   FUN = svymean, 
                   vartype = c('ci'),
                   na.rm = T)
mean_salt_sex$prediction<-round(mean_salt_sex$prediction, 1)
mean_salt_sex$ci_l<-round(mean_salt_sex$ci_l, 1)
mean_salt_sex$ci_u<-round(mean_salt_sex$ci_u, 1)

svymean(~prediction, design = d.s)
confint(svymean(~prediction, design = d.s), level = 0.95)





#########
#Comparison with GBD estimates
comparison<-read.csv("~/Desktop/Artículos/STEPS/Supplementary materials/Comparison_predictions_vs_global_results_2021-07-26.csv")
comparison<-select(comparison, 1, 4:5)
comparison<-suppressWarnings(separate(comparison, col = Estimated.mean.salt..intake.and.95..CI..g.day., into = paste0("value",1:3), sep = "[^[:digit:]?\\.]+") )
names(comparison)[3:5]<-c("gbd.val", "gbd.lower", "gbd.upper")
   #aggregating 5 countries: Namibia, Niger, Marshall Islands, Mozambique, and Uruguay
faltantes<-data.frame(Country=c("Namibia", "Niger", "Marshall Islands", "Mozambique","Uruguay")
                      , Year..for..global.estimatates.=c(2010, 2010, 2010, 2010, 2010), gbd.val=c(2.64, 2.92,2.55,2.24,2.73)
                      , gbd.lower=c(2.24, 2.48,2.15,1.86,2.32), gbd.upper=c(3.09, 3.44,3.01,2.65,3.18))
faltantes$gbd.val<-round(faltantes$gbd.val*2.5,1)
faltantes$gbd.lower<-round(faltantes$gbd.lower*2.5,1)
faltantes$gbd.upper<-round(faltantes$gbd.upper*2.5,1)

comparison<-rbind(comparison, faltantes)
rm(faltantes)

data<-read.csv("~/Desktop/Artículos/STEPS/Supplementary materials/Mean_salt_intake_predicted_by_survey_in_54_LMICs_2021-11-29.csv")
#Joining with income and region
income<-read.csv("~/Desktop/Artículos/STEPS/Supplementary materials/Countries_by_income_2021-11-22.csv")
data<-merge(data, income, by=c("Country", "Year"))
data$Mean.salt.intake..g.day.<-round(data$Mean.salt.intake..g.day., 1)
data<-filter(data, Sex =="Total")
data<-merge(data, comparison, by="Country", all.x=TRUE)
data<-select(data, Country, 2, 4:6, 11:14)

  #For text
x<-data
x$gbd.lower<-as.numeric(x$gbd.lower)
x$gbd.upper<-as.numeric(x$gbd.upper)
x$predictions_cross_gbd_estimates<-c((x$Mean.salt.intake..g.day..lower>=x$gbd.lower & x$Mean.salt.intake..g.day..lower<= x$gbd.upper) |
                                       (x$Mean.salt.intake..g.day..upper<=x$gbd.upper & x$Mean.salt.intake..g.day..upper>=x$gbd.lower))

names(data)<-c("country", "year_prediction", "salt_predicted", "salt_predicted_lower", 
               "salt_predicted_upper", "year_estimated", "salt_estimated",  "salt_estimated_lower", "salt_estimated_upper")
data$salt_predicted<-as.numeric(data$salt_predicted)
data$salt_predicted_lower<-as.numeric(data$salt_predicted_lower)
data$salt_predicted_upper<-as.numeric(data$salt_predicted_upper)
data$salt_estimated<-as.numeric(data$salt_estimated)
data$salt_estimated_lower<-as.numeric(data$salt_estimated_lower)
data$salt_estimated_upper<-as.numeric(data$salt_estimated_upper)

data$salt_predicted<-round(data$salt_predicted, 1)
data$salt_predicted_lower<-round(data$salt_predicted_lower, 1)
data$salt_predicted_upper<-round(data$salt_predicted_upper, 1)
data$salt_estimated<-round(data$salt_estimated, 1)
data$salt_estimated_lower<-round(data$salt_estimated_lower, 1)
data$salt_estimated_upper<-round(data$salt_estimated_upper, 1)

data$ratio<-data$salt_predicted/data$salt_estimated
data$absolute_difference<-abs(data$salt_estimated-data$salt_predicted)

data$salt_predicted<-paste0(data$salt_predicted,  " (", data$salt_predicted_lower, "-", data$salt_predicted_upper, ")")
data$salt_estimated<-paste0(data$salt_estimated,  " (", data$salt_estimated_lower, "-", data$salt_estimated_upper, ")")

names(data)
data<-select(data, 1:3, 6:7, 10:11 )
data<-filter(data, !year_estimated ==  "Not reported")
data$ratio<-round(data$ratio, 1)
data$absolute_difference<-round(data$absolute_difference, 1)

names(data)<-c("Country",  "Year (for \nML predictions)", "ML-predicted mean salt \nintake and 95% CI (g/day)",
               "Year (for \nglobal estimatates)", "Estimated mean salt \nintake and 95% CI (g/day)",
               "Ratio between predicted and \nestimated mean salt intake", 
               "Absolute difference between \npredicted mean salt intake and \nglobal estimated mean salt intake")

write.csv(data, paste0("~/Desktop/Artículos/STEPS/Supplementary materials/Comparison_predictions_vs_global_results_", Sys.Date(), ".csv"),row.names = FALSE)









